###########################################
#REPLICATION FILE FOR VOX POPULI, VOX DEI?
#by ADAM RAMEY
#SECTION 4 (incl. FIGURE 1)
###########################################



library(basicspace)
library(doParallel)
library(ggplot2)
library(plyr)

set.seed(1234)


montecarlo_wrapper <- function(nSim=100, nDist=50, nPerDist=100, stateChange=TRUE){
  
  ##total num of obs is no. of districts X respondents per district
  N <- nPerDist*nDist
  
  ##set the true party positions + obama
  dem.true <- -1
  rep.true <- 1
  obama.true <- -0.9
  
  ##house members and voters are normally distributed
  house.true <- rnorm(nDist,sd=1)
  voters.true <- rnorm(N)
  
  
  
  ##we generate the a and b parameters from A-M equation
  ##stateChange is an indicator for whether there are 
  ##common means for these parameters by state
  
  districtCounter <- rep(1:nDist,each=nPerDist)
  
  if (stateChange==TRUE){
    ##distortion parameters
    a_state <- rnorm(nDist)
    b_state <- rnorm(nDist)
    a <- rnorm(N,mean=a_state[districtCounter],sd=1)
    b <- rnorm(N,mean=b_state[districtCounter],sd=1)
  }else{
    ##distortion parameters
    a <- rnorm(N,mean=0,sd=1)
    b <- rnorm(N,mean=1,sd=1)  
  }  
  
  ##set up data matrix
  y <- matrix(NA,N,6)
  colnames(y) <- c("district","self","dem","rep","obama","MC")
  y[,1] <- districtCounter 
  
  
  am_montecarlo <- function(){
    
    ##set up storage
    house.sims <- matrix(NA,nDist)
    
    #put true parameters together
    X <- cbind(voters.true, dem.true, rep.true, obama.true, house.true[districtCounter])
    y[,2:6] <- a + b*X + rnorm(N)
    
    #do the national estimation
    natl <- aldmck(y[,2:5],polarity=2,respondent=1)$stim
    
    #renormalize the national scale to have rep at 1 and dem at -1
    slope <- 2/(natl["rep"]-natl["dem"])
    natl <- slope*natl - slope*natl["dem"] - 1 
    
    #now go through districts, run AM, and rescale
    for (j in 1:nDist){
      tmp <- y[y[,1]==j,]
      dist <- aldmck(tmp[,2:6],polarity=2,respondent=1)$stim
      slope <- (natl["rep"]-natl["dem"])/(dist["rep"]-dist["dem"])
      dist.rescale <- slope*dist["MC"] - slope*dist["dem"] + natl["dem"]
      house.sims[j] <- dist.rescale
    }
    
    return(house.sims)
  }
  
  ##we do the the A-M simlation nSim times
  registerDoParallel(cores=8)
  am_res <- foreach(i=1:nSim, .combine='cbind') %dopar% am_montecarlo()
  
  ##calculate the RMSE
  RMSE <- apply((am_res-house.true)^2, 1, function(x) sum(x^2,na.rm=TRUE)/length(x[!is.na(x)]))
  
  return(list("estimates"=apply(am_res,1,mean), "RMSE"=RMSE, "h_true"=house.true))

}

##run it for different sample sizes
res1 <- montecarlo_wrapper(nDist=20,nPerDist=50,nSim=100)
res2 <- montecarlo_wrapper(nDist=20,nPerDist=100,nSim=100)
res3 <- montecarlo_wrapper(nDist=20,nPerDist=250,nSim=100)
res4 <- montecarlo_wrapper(nDist=20,nPerDist=500,nSim=100)

##collapse results
res <- data.frame(rep(c(50,100,250,500), each=20),c(res1$RMSE,res2$RMSE,res3$RMSE,res4$RMSE))
colnames(res) <- c("SampleSize","RMSE")

##summarize results across iterations
mcdat <- ddply(res, c("SampleSize"), summarise,
                     M = mean(sqrt(RMSE), na.rm=T),
                     SE = sd(sqrt(RMSE), na.rm=T)/sqrt(length(na.omit(RMSE))),
                     N = length(na.omit(RMSE)))


##Replicate Figure 1
g <- ggplot(mcdat, aes(x = M, xmin = M-2*SE, xmax = M+2*SE, y = factor(SampleSize ))) +
  geom_point() + geom_segment( aes(x = M-SE, xend = M+SE,
                                   y = factor(SampleSize), yend=factor(SampleSize))) +
  theme_bw() + opts(axis.title.x = theme_text(size = 12, vjust = .25)) + 
  xlab("RMSE") + ylab("Sample Size") +
  coord_flip() +
  opts(title = expression("RMSE by Sample Size"))

res_pts <- data.frame(rep(c(50,100,250,500), each=20),
                   c(res1$est,res2$est,res3$est,res4$est),
                   c(res1$h,res2$h,res3$h,res4$h))
colnames(res_pts) <- c("SampleSize","Estimates","True")


g2 <- ggplot(res_pts, aes(Estimates,True)) +
  geom_point() + facet_wrap(~SampleSize) +
  theme_bw() + 
  opts(axis.title.x = theme_text(size = 12, vjust = .25)) + 
  xlab("Estimates") + ylab("True Parameters") +
  geom_abline(intercept=0,slope=1)

